package com.yym.cloudweb.crawl.station.dadianjing;

import org.jsoup.nodes.Element;
import org.seimicrawler.xpath.JXDocument;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

public class DadianjingPageProcessor implements PageProcessor {

    private Site site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10000);

    @Override
    public Site getSite() {
        return site;
    }

    @Override
    public void process(Page page) {
        Thread currentThread = Thread.currentThread();
        System.out.println("当前线程id: "+currentThread.getId() + " 结果: "+page.getHtml().toString());
    }

    private static String getValByXpath(Element element, String xpath){
        JXDocument eleJxDocument = JXDocument.create(element.toString());
        return eleJxDocument.selOne(xpath).toString();
    }

}
